Version: 1.0 (Jupytext, time measurements, logger)
Income data an artificial data set created for (time series) regression problems.
There are two types of data there:
NOTE:
There are even whole train/test data sets for both catetory present there.
All data:
p
data_gen = IncomeWeatherDataGenerator()
(X_train, X_test), (Y_train_reg, Y_test_reg), (Y_train_bin, Y_test_bin), (Y_train_ter, Y_test_ter), \
(Y_train_ter_oh, Y_test_ter_oh) = data_gen.generate_basic_ml_data()
(X_train, X_test), (Y_train_reg, Y_test_reg), (Y_train_bin, Y_test_bin), (Y_train_ter, Y_test_ter), \
(Y_train_ter_oh, Y_test_ter_oh) = data_gen.generate_multidim_ml_data()
Regression:
p
data_gen = IncomeWeatherDataGenerator()
(X_train, X_test), (Y_train, Y_test), _, _, _ = data_gen.generate_basic_ml_data()
(X_train, X_test), (Y_train, Y_test), _, _, _ = data_gen.generate_multidim_ml_data()
Binary Classification:
p
data_gen = IncomeWeatherDataGenerator()
(X_train, X_test), _, (Y_train, Y_test), _, _ = data_gen.generate_basic_ml_data()
(X_train, X_test), _, (Y_train, Y_test), _, _ = data_gen.generate_multidim_ml_data()
Tertiary Classification Dense:
p
data_gen = IncomeWeatherDataGenerator()
(X_train, X_test), _, _, (Y_train, Y_test), _ = data_gen.generate_basic_ml_data()
(X_train, X_test), _, _, (Y_train, Y_test), _ = data_gen.generate_multidim_ml_data()
Tertiary Classification One Hot:
p
data_gen = IncomeWeatherDataGenerator()
(X_train, X_test), _, _, _, (Y_train, Y_test) = data_gen.generate_basic_ml_data()
(X_train, X_test), _, _, _, (Y_train, Y_test) = data_gen.generate_multidim_ml_data()
import sys
import os
sys.path+=[os.path.join(os.getcwd(), ".."), os.path.join(os.getcwd(), "../..")] # one and two up
ToC
Necessary libraries for notebook functionality:
NOTE: This way, using the function, the button works only in active notebook. If the functionality needs to be preserved in html export, then the code has to be incluced directly into notebook.
from src.utils.notebook_support_functions import create_button, get_notebook_name
from src.utils.logger import Logger
from src.utils.envs import Envs
from src.utils.config import Config
from pandas import options
from IPython.display import display, HTML
Constants for overall behaviour.
LOGGER_CONFIG_NAME = "logger_file_console" # default
PYTHON_CONFIG_NAME = "python_personal" # default
CREATE_BUTTON = False
ADDAPT_WIDTH = False
NOTEBOOK_NAME = get_notebook_name()
options.display.max_rows = 500
options.display.max_columns = 500
envs = Envs()
envs.set_logger(LOGGER_CONFIG_NAME)
envs.set_config(PYTHON_CONFIG_NAME)
Logger().start_timer(f"NOTEBOOK; Notebook name: {NOTEBOOK_NAME}")
if CREATE_BUTTON:
create_button()
if ADDAPT_WIDTH:
display(HTML("<style>.container { width:100% !important; }</style>")) # notebook width
A: ../../configurations\logger_file_console.conf 2023-06-05 15:07:28,542 - git.util - DEBUG - Failed checking if running in CYGWIN due to: FileNotFoundError(2, 'The system cannot find the file specified', None, 2, None) 2023-06-05 15:07:28,544 - file_console - DEBUG - Logger was created on JIRI-A in branche 012_check_rerun_and_save_all_notebooks. 2023-06-05 15:07:28,545 - file_console - DEBUG - Process: NOTEBOOK; Notebook name: income_weather_data_generator_documentation.py; Timer started;
from pandas import options
from numpy import array
from collections import Counter
from src.data.income_weather_data_generator import IncomeWeatherDataGenerator, ATTR_OUTPUT, ATTR_DATE
from src.data.df_explorer import DFExplorer
from pprint import pprint
from src.visualisations.plotly_time_series import PlotlyTimeSeries
# from src.global_constants import * # Remember to import only the constants in use
N_ROWS_TO_DISPLAY = 2
FIGURE_SIZE_SETTING = {"autosize": False, "width": 2200, "height": 750}
DATA_PROCESSING_CONFIG_NAME = "data_processing_basic"
The beta parameters follow the following order of attributes in encoded data frame:
Column RANDOM is not used in generating. It is just random data to confuse the regression and test importance.
start_date = "2018-01-01"
n = 40
betas = [30, 2, 1, 4, 3, 6, -1, -3, 0, -10, 25, 10]
sigma = 10
len(betas)
12
Logger().set_meantime("Generating Starts")
data_gen = IncomeWeatherDataGenerator()
df_data, df_data_transformed, X_multi, Y_multi = data_gen.generate(start_date, betas, n, sigma)
Logger().set_meantime("Generating Ends")
2023-06-05 15:07:28,700 - file_console - DEBUG - Process: NOTEBOOK; Notebook name: income_weather_data_generator_documentation.py; Timer meantime; Meantime of: Generating Starts; Duration [s]: 0.16; Duration [m]: 0.0 2023-06-05 15:07:28,711 - file_console - DEBUG - Process: NOTEBOOK; Notebook name: income_weather_data_generator_documentation.py; Timer meantime; Meantime of: Generating Ends; Duration [s]: 0.01; Duration [m]: 0.0
attr_names = data_gen.get_attributes_names_multi()
print(len(attr_names))
print(attr_names)
13 ['TEMPERATURE', 'DAY_OF_WEEK_NUM_0', 'DAY_OF_WEEK_NUM_1', 'DAY_OF_WEEK_NUM_2', 'DAY_OF_WEEK_NUM_3', 'DAY_OF_WEEK_NUM_4', 'DAY_OF_WEEK_NUM_5', 'DAY_OF_WEEK_NUM_6', 'WEATHER_cloud', 'WEATHER_rain', 'WEATHER_sun', 'WEATHER_wind', 'RANDOM']
data_gen.get_weights_multi()
array([[ 0, 82, 70, 21, 1, 24, 26, -10, 86, 48, 33, 81, 81],
[-10, 47, 45, 39, 76, 83, 94, 68, 23, 23, 22, 84, 17],
[-18, -9, -2, 52, -9, 86, 44, 89, 12, 47, 72, -20, 90],
[ 33, 55, 21, 7, 19, 78, 81, 17, 62, 45, 58, 56, -6],
[ 99, 73, 57, 20, 85, 21, 10, 81, 4, 32, 18, -3, 65],
[ 17, 16, 23, 31, 18, 10, 38, 32, -13, 55, 60, 36, 33],
[ 88, 23, 29, 41, 95, 2, 42, 6, 94, -20, 57, 96, 93],
[ 15, -20, 16, 41, 44, 79, 47, -1, -5, 45, 94, -2, 93],
[-10, -11, 86, 48, 60, 48, 9, 93, 37, 63, 85, 7, 98],
[ 59, 28, 21, 65, 21, 34, 44, 82, 39, -8, -7, -1, 36],
[ 87, 5, 61, 78, 49, 80, 19, 11, 43, -19, 53, 17, 93],
[ 79, 96, -19, 38, 40, -6, 58, 20, 91, 67, 19, 99, -15],
[ 77, 40, 54, 10, 73, 53, -7, 53, -12, 29, 3, 64, 77],
[ 5, -17, -12, 17, 98, 6, 45, 98, -20, 38, 14, 98, 51],
[ 62, 49, 21, 96, 58, -4, 46, 75, 96, -2, -10, 46, -5],
[ 26, -1, 69, 19, 25, 12, 28, -6, 43, 45, 6, 22, 35],
[ 10, 12, -14, 2, 22, 49, 6, 98, 65, 14, 94, 85, 52],
[ 58, 77, 57, 63, 42, 46, 99, 43, 59, 54, -9, -8, 29],
[ 22, 18, 38, 44, 75, 93, 95, 9, 99, 70, 20, 3, 51],
[ 79, 53, 79, 7, -13, 43, 59, -13, 15, -10, 11, 69, 14]])
def print_info(array_matrix: array) -> None:
print(array_matrix.shape)
print(array_matrix[0].shape)
pprint(array_matrix[0])
print_info(X_multi)
(21, 20, 13)
(20, 13)
array([[ 26.30123686, 0. , 1. , 0. ,
0. , 0. , 0. , 0. ,
0. , 0. , 1. , 0. ,
8.42387195],
[ 12.53152482, 0. , 0. , 1. ,
0. , 0. , 0. , 0. ,
0. , 0. , 0. , 1. ,
10.15623474],
[ 16.88287117, 0. , 0. , 0. ,
1. , 0. , 0. , 0. ,
0. , 0. , 0. , 1. ,
5.37934559],
[ 23.36006694, 0. , 0. , 0. ,
0. , 1. , 0. , 0. ,
0. , 0. , 0. , 1. ,
-7.97323233],
[ 17.66663471, 0. , 0. , 0. ,
0. , 0. , 1. , 0. ,
0. , 1. , 0. , 0. ,
1.10374089],
[ 25.47780775, 0. , 0. , 0. ,
0. , 0. , 0. , 1. ,
0. , 1. , 0. , 0. ,
9.57396932],
[ 18.54946332, 1. , 0. , 0. ,
0. , 0. , 0. , 0. ,
0. , 1. , 0. , 0. ,
2.57076162],
[ 15.38873426, 0. , 1. , 0. ,
0. , 0. , 0. , 0. ,
0. , 0. , 1. , 0. ,
-10.90720137],
[ 17.81007228, 0. , 0. , 1. ,
0. , 0. , 0. , 0. ,
0. , 0. , 0. , 1. ,
9.57394438],
[ 23.03349522, 0. , 0. , 0. ,
1. , 0. , 0. , 0. ,
0. , 0. , 0. , 1. ,
-12.455361 ],
[ 27.27151619, 0. , 0. , 0. ,
0. , 1. , 0. , 0. ,
0. , 0. , 0. , 1. ,
-4.34301439],
[ 16.18792431, 0. , 0. , 0. ,
0. , 0. , 1. , 0. ,
0. , 0. , 1. , 0. ,
3.68621572],
[ 15.22283418, 0. , 0. , 0. ,
0. , 0. , 0. , 1. ,
0. , 1. , 0. , 0. ,
-3.15842204],
[ 19.13825404, 1. , 0. , 0. ,
0. , 0. , 0. , 0. ,
0. , 1. , 0. , 0. ,
-8.03491056],
[ 30.65096906, 0. , 1. , 0. ,
0. , 0. , 0. , 0. ,
0. , 1. , 0. , 0. ,
-13.60260239],
[ 20.49227047, 0. , 0. , 1. ,
0. , 0. , 0. , 0. ,
0. , 0. , 0. , 1. ,
-6.38054417],
[ 34.00006379, 0. , 0. , 0. ,
1. , 0. , 0. , 0. ,
1. , 0. , 0. , 0. ,
-7.96685064],
[ 25.42992178, 0. , 0. , 0. ,
0. , 1. , 0. , 0. ,
0. , 1. , 0. , 0. ,
-0.97627406],
[ 30.8360921 , 0. , 0. , 0. ,
0. , 0. , 1. , 0. ,
0. , 0. , 1. , 0. ,
-21.0868599 ],
[ 33.06156142, 0. , 0. , 0. ,
0. , 0. , 0. , 1. ,
1. , 0. , 0. , 0. ,
-3.36931825]])
print_info(Y_multi)
(21, 1) (1,) array([17971.07603628])
df_data.head()
| DATE | WEATHER | TEMPERATURE | RANDOM | OUTPUT | |
|---|---|---|---|---|---|
| 0 | 2018-01-01 | sun | 26.301237 | 8.423872 | 807.522026 |
| 1 | 2018-01-02 | wind | 12.531525 | 10.156235 | 392.427469 |
| 2 | 2018-01-03 | wind | 16.882871 | 5.379346 | 526.368373 |
| 3 | 2018-01-04 | wind | 23.360067 | -7.973232 | 715.231729 |
| 4 | 2018-01-05 | rain | 17.666635 | 1.103741 | 523.338592 |
df_data_transformed.shape
(40, 14)
df_data_transformed.head()
| TEMPERATURE | DAY_OF_WEEK_NUM_0 | DAY_OF_WEEK_NUM_1 | DAY_OF_WEEK_NUM_2 | DAY_OF_WEEK_NUM_3 | DAY_OF_WEEK_NUM_4 | DAY_OF_WEEK_NUM_5 | DAY_OF_WEEK_NUM_6 | WEATHER_cloud | WEATHER_rain | WEATHER_sun | WEATHER_wind | RANDOM | OUTPUT | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 26.301237 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 8.423872 | 807.522026 |
| 1 | 12.531525 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 10.156235 | 392.427469 |
| 2 | 16.882871 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 5.379346 | 526.368373 |
| 3 | 23.360067 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | -7.973232 | 715.231729 |
| 4 | 17.666635 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1.103741 | 523.338592 |
df_explorer = DFExplorer()
ts_visu = PlotlyTimeSeries()
ts = df_data[ATTR_OUTPUT]
ts.index = df_data[ATTR_DATE]
ts_visu.plot(
series=[df_data[ATTR_OUTPUT]],
plot_title="Weather Output Variable",
y_title="Profit"
)
df_explorer.print_info_about_data_frame(df=df_data)
DataFrame type: <class 'pandas.core.frame.DataFrame'>
DataFrame shape: (40, 5)
DataFrame dtypes: {'DATE': 'datetime64[ns]', 'WEATHER': 'object', 'TEMPERATURE': 'float64', 'RANDOM': 'float64', 'OUTPUT': 'float64'}
DataFrame head:
DATE WEATHER TEMPERATURE RANDOM OUTPUT
0 2018-01-01 sun 26.301237 8.423872 807.522026
1 2018-01-02 wind 12.531525 10.156235 392.427469
2 2018-01-03 wind 16.882871 5.379346 526.368373
3 2018-01-04 wind 23.360067 -7.973232 715.231729
4 2018-01-05 rain 17.666635 1.103741 523.338592
DataFrame description:
TEMPERATURE RANDOM OUTPUT
count 40.000000 40.000000 40.000000
mean 21.078239 -1.882978 640.120576
std 6.419081 8.819785 193.694019
min 10.056193 -23.149646 312.619056
25% 16.709134 -7.968446 517.410262
50% 19.833784 -0.545387 607.037082
75% 25.429544 4.239111 752.048488
max 34.000064 13.524814 1040.510861
df_explorer.print_attr_stats(df=df_data)
Attribute Name: DATE Attribute type: datetime64[ns] Number of Null values: 0 Number of unique values is:40 Percentage of unique values is: 1.0 ############################################# Attribute Name: WEATHER Attribute type: object Number of Null values: 0 Number of unique values is:4 Percentage of unique values is: 0.1 Summation of unique values per ID: wind 12 sun 10 rain 9 cloud 9 Name: WEATHER, dtype: int64
############################################# Attribute Name: TEMPERATURE Attribute type: float64 Number of Null values: 0 Number of unique values is:40 Percentage of unique values is: 1.0
############################################# Attribute Name: RANDOM Attribute type: float64 Number of Null values: 0 Number of unique values is:40 Percentage of unique values is: 1.0
############################################# Attribute Name: OUTPUT Attribute type: float64 Number of Null values: 0 Number of unique values is:40 Percentage of unique values is: 1.0
#############################################
df_explorer.print_info_about_data_frame(df=df_data_transformed)
DataFrame type: <class 'pandas.core.frame.DataFrame'>
DataFrame shape: (40, 14)
DataFrame dtypes: {'TEMPERATURE': 'float64', 'DAY_OF_WEEK_NUM_0': 'uint8', 'DAY_OF_WEEK_NUM_1': 'uint8', 'DAY_OF_WEEK_NUM_2': 'uint8', 'DAY_OF_WEEK_NUM_3': 'uint8', 'DAY_OF_WEEK_NUM_4': 'uint8', 'DAY_OF_WEEK_NUM_5': 'uint8', 'DAY_OF_WEEK_NUM_6': 'uint8', 'WEATHER_cloud': 'uint8', 'WEATHER_rain': 'uint8', 'WEATHER_sun': 'uint8', 'WEATHER_wind': 'uint8', 'RANDOM': 'float64', 'OUTPUT': 'float64'}
DataFrame head:
TEMPERATURE DAY_OF_WEEK_NUM_0 DAY_OF_WEEK_NUM_1 DAY_OF_WEEK_NUM_2 \
0 26.301237 0 1 0
1 12.531525 0 0 1
2 16.882871 0 0 0
3 23.360067 0 0 0
4 17.666635 0 0 0
DAY_OF_WEEK_NUM_3 DAY_OF_WEEK_NUM_4 DAY_OF_WEEK_NUM_5 DAY_OF_WEEK_NUM_6 \
0 0 0 0 0
1 0 0 0 0
2 1 0 0 0
3 0 1 0 0
4 0 0 1 0
WEATHER_cloud WEATHER_rain WEATHER_sun WEATHER_wind RANDOM \
0 0 0 1 0 8.423872
1 0 0 0 1 10.156235
2 0 0 0 1 5.379346
3 0 0 0 1 -7.973232
4 0 1 0 0 1.103741
OUTPUT
0 807.522026
1 392.427469
2 526.368373
3 715.231729
4 523.338592
DataFrame description:
TEMPERATURE DAY_OF_WEEK_NUM_0 DAY_OF_WEEK_NUM_1 DAY_OF_WEEK_NUM_2 \
count 40.000000 40.000000 40.00000 40.00000
mean 21.078239 0.125000 0.15000 0.15000
std 6.419081 0.334932 0.36162 0.36162
min 10.056193 0.000000 0.00000 0.00000
25% 16.709134 0.000000 0.00000 0.00000
50% 19.833784 0.000000 0.00000 0.00000
75% 25.429544 0.000000 0.00000 0.00000
max 34.000064 1.000000 1.00000 1.00000
DAY_OF_WEEK_NUM_3 DAY_OF_WEEK_NUM_4 DAY_OF_WEEK_NUM_5 \
count 40.00000 40.00000 40.00000
mean 0.15000 0.15000 0.15000
std 0.36162 0.36162 0.36162
min 0.00000 0.00000 0.00000
25% 0.00000 0.00000 0.00000
50% 0.00000 0.00000 0.00000
75% 0.00000 0.00000 0.00000
max 1.00000 1.00000 1.00000
DAY_OF_WEEK_NUM_6 WEATHER_cloud WEATHER_rain WEATHER_sun \
count 40.000000 40.000000 40.000000 40.000000
mean 0.125000 0.225000 0.225000 0.250000
std 0.334932 0.422902 0.422902 0.438529
min 0.000000 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000 0.250000
max 1.000000 1.000000 1.000000 1.000000
WEATHER_wind RANDOM OUTPUT
count 40.000000 40.000000 40.000000
mean 0.300000 -1.882978 640.120576
std 0.464095 8.819785 193.694019
min 0.000000 -23.149646 312.619056
25% 0.000000 -7.968446 517.410262
50% 0.000000 -0.545387 607.037082
75% 1.000000 4.239111 752.048488
max 1.000000 13.524814 1040.510861
df_explorer.print_attr_stats(df=df_data_transformed)
Attribute Name: TEMPERATURE Attribute type: float64 Number of Null values: 0 Number of unique values is:40 Percentage of unique values is: 1.0
############################################# Attribute Name: DAY_OF_WEEK_NUM_0 Attribute type: uint8 Number of Null values: 0 Number of unique values is:2 Percentage of unique values is: 0.05 Summation of unique values per ID: 0 35 1 5 Name: DAY_OF_WEEK_NUM_0, dtype: int64
############################################# Attribute Name: DAY_OF_WEEK_NUM_1 Attribute type: uint8 Number of Null values: 0 Number of unique values is:2 Percentage of unique values is: 0.05 Summation of unique values per ID: 0 34 1 6 Name: DAY_OF_WEEK_NUM_1, dtype: int64
############################################# Attribute Name: DAY_OF_WEEK_NUM_2 Attribute type: uint8 Number of Null values: 0 Number of unique values is:2 Percentage of unique values is: 0.05 Summation of unique values per ID: 0 34 1 6 Name: DAY_OF_WEEK_NUM_2, dtype: int64
############################################# Attribute Name: DAY_OF_WEEK_NUM_3 Attribute type: uint8 Number of Null values: 0 Number of unique values is:2 Percentage of unique values is: 0.05 Summation of unique values per ID: 0 34 1 6 Name: DAY_OF_WEEK_NUM_3, dtype: int64
############################################# Attribute Name: DAY_OF_WEEK_NUM_4 Attribute type: uint8 Number of Null values: 0 Number of unique values is:2 Percentage of unique values is: 0.05 Summation of unique values per ID: 0 34 1 6 Name: DAY_OF_WEEK_NUM_4, dtype: int64
############################################# Attribute Name: DAY_OF_WEEK_NUM_5 Attribute type: uint8 Number of Null values: 0 Number of unique values is:2 Percentage of unique values is: 0.05 Summation of unique values per ID: 0 34 1 6 Name: DAY_OF_WEEK_NUM_5, dtype: int64
############################################# Attribute Name: DAY_OF_WEEK_NUM_6 Attribute type: uint8 Number of Null values: 0 Number of unique values is:2 Percentage of unique values is: 0.05 Summation of unique values per ID: 0 35 1 5 Name: DAY_OF_WEEK_NUM_6, dtype: int64
############################################# Attribute Name: WEATHER_cloud Attribute type: uint8 Number of Null values: 0 Number of unique values is:2 Percentage of unique values is: 0.05 Summation of unique values per ID: 0 31 1 9 Name: WEATHER_cloud, dtype: int64
############################################# Attribute Name: WEATHER_rain Attribute type: uint8 Number of Null values: 0 Number of unique values is:2 Percentage of unique values is: 0.05 Summation of unique values per ID: 0 31 1 9 Name: WEATHER_rain, dtype: int64
############################################# Attribute Name: WEATHER_sun Attribute type: uint8 Number of Null values: 0 Number of unique values is:2 Percentage of unique values is: 0.05 Summation of unique values per ID: 0 30 1 10 Name: WEATHER_sun, dtype: int64
############################################# Attribute Name: WEATHER_wind Attribute type: uint8 Number of Null values: 0 Number of unique values is:2 Percentage of unique values is: 0.05 Summation of unique values per ID: 0 28 1 12 Name: WEATHER_wind, dtype: int64
############################################# Attribute Name: RANDOM Attribute type: float64 Number of Null values: 0 Number of unique values is:40 Percentage of unique values is: 1.0
############################################# Attribute Name: OUTPUT Attribute type: float64 Number of Null values: 0 Number of unique values is:40 Percentage of unique values is: 1.0
#############################################
data_gen = IncomeWeatherDataGenerator()
(X_train, X_test), (Y_train_reg, Y_test_reg), (Y_train_bin, Y_test_bin), (Y_train_ter, Y_test_ter), \
(Y_train_ter_oh, Y_test_ter_oh) = data_gen.generate_basic_ml_data()
print(X_train.shape)
print(Y_train_reg.shape)
print(Y_train_bin.shape)
print(Y_train_ter.shape)
print(Y_train_ter_oh.shape)
print("\n")
print(X_test.shape)
print(Y_test_reg.shape)
print(Y_test_bin.shape)
print(Y_test_ter.shape)
print(Y_test_ter_oh.shape)
(5479, 12) (5479, 1) (5479, 1) (5479, 1) (5479, 3) (1827, 12) (1827, 1) (1827, 1) (1827, 1) (1827, 3)
print(Counter(list(Y_train_bin.reshape((Y_train_bin.shape[0],)).tolist())))
print(Counter(list(Y_train_ter.reshape((Y_train_ter.shape[0],)).tolist())))
print("\n")
print(Counter(list(Y_test_bin.reshape((Y_test_bin.shape[0],)).tolist())))
print(Counter(list(Y_test_ter.reshape((Y_test_ter.shape[0],)).tolist())))
Counter({0: 2854, 1: 2625})
Counter({1: 2967, 0: 1411, 2: 1101})
Counter({0: 956, 1: 871})
Counter({1: 975, 0: 469, 2: 383})
data_gen = IncomeWeatherDataGenerator()
(X_train, X_test), (Y_train_reg, Y_test_reg), (Y_train_bin, Y_test_bin), (Y_train_ter, Y_test_ter), \
(Y_train_ter_oh, Y_test_ter_oh) = data_gen.generate_multidim_ml_data()
print(X_train.shape)
print(Y_train_reg.shape)
print(Y_train_bin.shape)
print(Y_train_ter.shape)
print(Y_train_ter_oh.shape)
print("\n")
print(X_test.shape)
print(Y_test_reg.shape)
print(Y_test_bin.shape)
print(Y_test_ter.shape)
print(Y_test_ter_oh.shape)
(5460, 20, 13) (5460, 1) (5460, 1) (5460, 1) (5460, 3) (1808, 20, 13) (1808, 1) (1808, 1) (1808, 1) (1808, 3)
print(Counter(list(Y_train_bin.reshape((Y_train_bin.shape[0],)).tolist())))
print(Counter(list(Y_train_ter.reshape((Y_train_ter.shape[0],)).tolist())))
print("\n")
print(Counter(list(Y_test_bin.reshape((Y_test_bin.shape[0],)).tolist())))
print(Counter(list(Y_test_ter.reshape((Y_test_ter.shape[0],)).tolist())))
Counter({1: 2908, 0: 2552})
Counter({1: 2604, 2: 1568, 0: 1288})
Counter({1: 953, 0: 855})
Counter({1: 872, 2: 499, 0: 437})
Logger().end_timer()
2023-06-05 15:07:30,076 - file_console - DEBUG - Process: NOTEBOOK; Notebook name: income_weather_data_generator_documentation.py; Timer ended; Process Duration [s]: 1.53; Process Duration [m]: 0.03